R Markdown

#checking nulls

percent_nulls(dat) %>%
  arrange(desc(percent_null)) %>%
  mutate(
    percent_null = round(percent_null, 2)  
  ) %>%
  kable(
    col.names = c("Variable", "Percent Missing"),
    caption = "Percentage of Missing Values by Variable"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
Percentage of Missing Values by Variable
Variable Percent Missing
CD4 count(Last two months) 98.74
CD4 count(First two months) 98.69
EPTB Others 98.26
Culture S 98.25
Culture H 98.21
Culture E 98.18
Lipa Hain Isoniazid 97.55
Culture R 97.44
Lipa Hain Rifampicin) 96.96
TB LAM 96.21
Culture Date 93.26
Culture Result 93.18
Xray Date 92.45
GeneXpert Date 89.26
EPTB Sub Type 85.96
0th Month Serial No and Quantification 80.77
5th Month Serial No and Quantification 76.90
Sputum Smear Examination 0th Month Result Date 76.26
Sputam Smear Examination 5th Month Result Date 75.52
6by8 Month Serial No and Quantification 73.68
Sputum Smear Examination 6by8 Month Result Date 69.36
Comorbidity 68.30
2by3 Month Serial No and Quantification 67.77
Sputum Smear Examination 2by3 Month Result Date 64.81
Xray 48.63
GeneXpert 1.32
Sputum Smear Examination 2by3 Month Result 0.69
ownertype 0.61
Is Patient Prisoner 0.36
Sputum Smear Examination 6by8 Month Result 0.31
Sputam Smear Examination 5th Month Result 0.31
Treatment Outcome Date 0.22
Regimen 0.21
Nutrition Support 0.19
kephlevel 0.13
BMI 0.03
Height (Mtrs) 0.02
Health Facility 0.02
Sector 0.00
Sub County Registration Number 0.00
Sputum Smear Examination 0th Month Result 0.00
County 0.00
Zone 0.00
Treatment Outcome 0.00
Serial Number 0.00
Date of Registration 0.00
Month 0.00
Region 0.00
Sub County 0.00
Year 0.00
Quarter 0.00
Sex (M/F) 0.00
Age on Registration 0.00
Weight (Kgs) 0.00
BMI/Z-Score 0.00
DOT by 0.00
Type of TB(P/EP) 0.00
Type of patient 0.00
Date of treatment started 0.00
Referred BY: VCT/HCC/STI/HBC/PS/ANC/SR/CI 0.00
Referred TO: VCT/HCC/STI/HBC/PS/ANC 0.00
mfl_code 0.00
dat2 %>%
   count(County) %>%
   mutate(
     percentage = round(n / sum(n) * 100, 2)
   ) %>%
   arrange(desc(n)) %>%
   flextable() %>%
   autofit()

County

n

percentage

Nairobi

74,676

14.33

Kiambu

27,393

5.26

Meru

24,892

4.78

Mombasa

24,804

4.76

Nakuru

20,595

3.95

Machakos

15,811

3.04

Turkana

15,398

2.96

Siaya

14,472

2.78

Kitui

14,399

2.76

Kisumu

14,004

2.69

Kakamega

13,793

2.65

Homa Bay

13,728

2.64

Murang'a

12,748

2.45

Kericho

12,331

2.37

Kisii

11,586

2.22

Makueni

11,344

2.18

Kilifi

11,102

2.13

Bomet

10,616

2.04

Bungoma

10,150

1.95

Uasin Gishu

10,060

1.93

Migori

9,950

1.91

Embu

9,789

1.88

Kajiado

9,591

1.84

Pokot

9,483

1.82

Narok

9,193

1.76

Nyeri

8,720

1.67

Kirinyaga

8,211

1.58

Trans Nzoia

7,191

1.38

Busia

6,562

1.26

Tharaka Nithi

6,491

1.25

Laikipia

5,966

1.15

Kwale

5,822

1.12

Garissa

5,661

1.09

Vihiga

5,154

0.99

Baringo

4,862

0.93

Nyamira

4,853

0.93

Nandi

4,722

0.91

Nyandarua

4,268

0.82

Samburu

4,134

0.79

Marsabit

3,874

0.74

Mandera

3,834

0.74

Isiolo

3,783

0.73

Taita Taveta

3,529

0.68

Wajir

3,475

0.67

Elgeyo Marakwet

3,422

0.66

Tana River

2,597

0.50

Lamu

1,913

0.37

1

0.00

# Summarize counts per county
county_summary <- dat2 %>%
  count(County) %>%
  mutate(
    percentage = round(n / sum(n) * 100, 2)
  ) %>%
  arrange(desc(n))

# Add a total row
# Summarize counts per county
county_summary <- dat2 %>%
  count(County) %>%
  mutate(
    percentage = round(n / sum(n) * 100, 2),
    total_cases = sum(n)           # same total repeated for each row
  ) %>%
  arrange(desc(n))
county_summary <- dat2 %>%
  filter(!is.na(County)) %>%             # remove rows with missing county
  count(County) %>%
  mutate(
    total_cases = sum(n),                # total TB cases repeated for each row
    percentage = round(n / total_cases * 100, 2)
  ) %>%
  arrange(desc(n)) %>%
  select(County, n, total_cases, percentage)  # reorder columns
# Print table with kableExtra
county_summary %>%
  kable(
    col.names = c("County", "Number of Patients", "Total TB Cases", "Percentage of Total"),
    caption = "TB Cases Distribution by County"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
TB Cases Distribution by County
County Number of Patients Total TB Cases Percentage of Total
Nairobi 74676 520952 14.33
Kiambu 27393 520952 5.26
Meru 24892 520952 4.78
Mombasa 24804 520952 4.76
Nakuru 20595 520952 3.95
Machakos 15811 520952 3.04
Turkana 15398 520952 2.96
Siaya 14472 520952 2.78
Kitui 14399 520952 2.76
Kisumu 14004 520952 2.69
Kakamega 13793 520952 2.65
Homa Bay 13728 520952 2.64
Murang’a 12748 520952 2.45
Kericho 12331 520952 2.37
Kisii 11586 520952 2.22
Makueni 11344 520952 2.18
Kilifi 11102 520952 2.13
Bomet 10616 520952 2.04
Bungoma 10150 520952 1.95
Uasin Gishu 10060 520952 1.93
Migori 9950 520952 1.91
Embu 9789 520952 1.88
Kajiado 9591 520952 1.84
Pokot 9483 520952 1.82
Narok 9193 520952 1.76
Nyeri 8720 520952 1.67
Kirinyaga 8211 520952 1.58
Trans Nzoia 7191 520952 1.38
Busia 6562 520952 1.26
Tharaka Nithi 6491 520952 1.25
Laikipia 5966 520952 1.15
Kwale 5822 520952 1.12
Garissa 5661 520952 1.09
Vihiga 5154 520952 0.99
Baringo 4862 520952 0.93
Nyamira 4853 520952 0.93
Nandi 4722 520952 0.91
Nyandarua 4268 520952 0.82
Samburu 4134 520952 0.79
Marsabit 3874 520952 0.74
Mandera 3834 520952 0.74
Isiolo 3783 520952 0.73
Taita Taveta 3529 520952 0.68
Wajir 3475 520952 0.67
Elgeyo Marakwet 3422 520952 0.66
Tana River 2597 520952 0.50
Lamu 1913 520952 0.37
county_summary <- county_summary %>%
  mutate(County = factor(County, levels = rev(County)),
         n_thousands = n / 1000)  # convert to thousands

ggplot(county_summary, aes(x = County, y = n_thousands, fill = County)) +
  geom_col() +
  scale_fill_grey(start = 0.3, end = 0.8) +
  coord_flip() +
  labs(
    title = "TB Cases Distribution by County",
    x = "County",
    y = "Number of TB Patients (thousands)"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none") +
  scale_y_continuous(expand = c(0,0)) 

ggplotly(p, tooltip = "text")
dat2 %>%
   count(`Gender`) %>%
   mutate(percentage = n / sum(n) * 100) %>%
   flextable()

Gender

n

percentage

Female

183,455

35.21527

Male

337,498

64.78473

dat3 <- dat2 %>%
  mutate(
    County = trimws(County),  # remove extra spaces
    County = case_when(
      County == "Tharaka Nithi"     ~ "Tharaka-Nithi",
      County == "Elgeyo Marakwet"   ~ "Elgeyo-Marakwet",
      County == "Pokot"             ~ "West Pokot",
      TRUE ~ County
    )
  )
tb_calc %>%
  select(
    County,
    Total_Population19,
    TB_cases,
    overall_percent,
    prevalence_per_100k
  ) %>%
  arrange(desc(TB_cases)) %>%
  flextable() %>%
  set_header_labels(
    County = "County",
    Total_Population19 = "Population (2019)",
    TB_cases = "TB Cases",
    overall_percent = "Overall (%)",
    prevalence_per_100k = "Prevalence / 100,000"
  ) %>%
  autofit()

County

Population (2019)

TB Cases

Overall (%)

Prevalence / 100,000

Nairobi

4,397,073

74,676

14.33

1,698.3

Kiambu

2,417,735

27,393

5.26

1,133.0

Meru

1,545,714

24,892

4.78

1,610.4

Mombasa

1,208,333

24,804

4.76

2,052.7

Nakuru

2,162,202

20,595

3.95

952.5

Machakos

1,421,932

15,811

3.04

1,111.9

Turkana

926,976

15,398

2.96

1,661.1

Siaya

993,183

14,472

2.78

1,457.1

Kitui

1,136,187

14,399

2.76

1,267.3

Kisumu

1,155,574

14,004

2.69

1,211.9

Kakamega

1,867,579

13,793

2.65

738.5

Homa Bay

1,131,950

13,728

2.64

1,212.8

Murang'a

1,056,640

12,748

2.45

1,206.5

Kericho

901,777

12,331

2.37

1,367.4

Kisii

1,266,860

11,586

2.22

914.5

Makueni

987,653

11,344

2.18

1,148.6

Kilifi

1,453,787

11,102

2.13

763.7

Bomet

875,689

10,616

2.04

1,212.3

Bungoma

1,670,570

10,150

1.95

607.6

Uasin Gishu

1,163,186

10,060

1.93

864.9

Migori

1,116,436

9,950

1.91

891.2

Embu

608,599

9,789

1.88

1,608.4

Kajiado

1,117,840

9,591

1.84

858.0

West Pokot

621,241

9,483

1.82

1,526.5

Narok

1,157,873

9,193

1.76

794.0

Nyeri

759,164

8,720

1.67

1,148.6

Kirinyaga

610,411

8,211

1.58

1,345.2

Trans Nzoia

990,341

7,191

1.38

726.1

Busia

893,681

6,562

1.26

734.3

Tharaka-Nithi

393,177

6,491

1.25

1,650.9

Laikipia

518,580

5,966

1.15

1,150.4

Kwale

866,820

5,822

1.12

671.7

Garissa

841,353

5,661

1.09

672.8

Vihiga

590,013

5,154

0.99

873.5

Baringo

666,763

4,862

0.93

729.2

Nyamira

605,576

4,853

0.93

801.4

Nandi

885,711

4,722

0.91

533.1

Nyandarua

638,289

4,268

0.82

668.7

Samburu

310,327

4,134

0.79

1,332.1

Marsabit

459,785

3,874

0.74

842.6

Mandera

867,457

3,834

0.74

442.0

Isiolo

268,002

3,783

0.73

1,411.6

Taita Taveta

340,671

3,529

0.68

1,035.9

Wajir

781,263

3,475

0.67

444.8

Elgeyo-Marakwet

454,480

3,422

0.66

752.9

Tana River

315,943

2,597

0.50

822.0

Lamu

143,920

1,913

0.37

1,329.2

1

0.00

tb_sex_wide %>%
  select(County, Female, Male, F_to_M_ratio) %>%
  filter(!is.na(F_to_M_ratio)) %>%   
  arrange(desc(F_to_M_ratio)) %>%
  mutate(
    Female = round(Female, 0),
    Male = round(Male, 0),
    F_to_M_ratio = round(F_to_M_ratio, 2)
  ) %>%
  kable(
    col.names = c("County", "Female TB cases", "Male TB cases", "F:M Ratio"),
    caption = "TB Cases by Gender and Female-to-Male Ratio by County (NAs removed)"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
TB Cases by Gender and Female-to-Male Ratio by County (NAs removed)
County Female TB cases Male TB cases F:M Ratio
Homa Bay 6233 7495 0.83
Siaya 6489 7983 0.81
Samburu 1825 2309 0.79
Marsabit 1573 2301 0.68
Nyamira 1967 2886 0.68
Tana River 1053 1544 0.68
Busia 2614 3948 0.66
Bungoma 4001 6149 0.65
Kisumu 5514 8490 0.65
Mandera 1517 2317 0.65
Kakamega 5396 8397 0.64
Turkana 6013 9385 0.64
Kilifi 4269 6833 0.62
Kwale 2198 3624 0.61
Migori 3784 6166 0.61
Pokot 3603 5880 0.61
Wajir 1314 2161 0.61
Isiolo 1404 2379 0.59
Kisii 4292 7294 0.59
Narok 3423 5770 0.59
Trans Nzoia 2632 4559 0.58
Nairobi 26993 47683 0.57
Bomet 3818 6798 0.56
Elgeyo Marakwet 1215 2207 0.55
Garissa 2002 3659 0.55
Kericho 4315 8016 0.54
Laikipia 2093 3873 0.54
Lamu 673 1240 0.54
Kajiado 3322 6269 0.53
Baringo 1663 3199 0.52
Uasin Gishu 3456 6604 0.52
Kiambu 9101 18292 0.50
Mombasa 8238 16566 0.50
Nakuru 6826 13769 0.50
Nandi 1559 3163 0.49
Taita Taveta 1158 2371 0.49
Tharaka Nithi 2135 4356 0.49
Nyandarua 1383 2885 0.48
Vihiga 1616 3538 0.46
Embu 2944 6845 0.43
Kitui 4299 10100 0.43
Machakos 4730 11081 0.43
Makueni 3424 7920 0.43
Nyeri 2608 6112 0.43
Meru 6938 17954 0.39
Murang’a 3561 9187 0.39
Kirinyaga 2271 5940 0.38
NA 0 1 0.00
year_counts <- dat5 %>%
  count(Year)

ggplot(year_counts, aes(x = Year, y = n)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  geom_vline(xintercept = 2020, linetype = "dashed") +
  annotate("text", x = 2020, y = max(year_counts$n),
           label = "COVID starts", hjust = -0.1) +
  theme_minimal() +
  labs(
    title = "Time Trend with COVID Period Marker",
    x = "Year",
    y = "Number of Records"
  )

tb_cases_covid <- dat5 %>%
  group_by(County, covid_period) %>%
  summarise(TB_cases = n(), .groups = "drop")
gender_county_presentable %>% flextable()

covid_period

County

Female

Male

F_to_M_ratio

Post-COVID

Baringo

927

1,624

0.57

Pre-COVID

Baringo

736

1,575

0.47

Post-COVID

Bomet

1,985

3,433

0.58

Pre-COVID

Bomet

1,833

3,365

0.54

Post-COVID

Bungoma

1,611

2,873

0.56

Pre-COVID

Bungoma

2,390

3,276

0.73

Post-COVID

Busia

1,054

1,756

0.60

Pre-COVID

Busia

1,560

2,192

0.71

Post-COVID

Elgeyo-Marakwet

564

1,096

0.51

Pre-COVID

Elgeyo-Marakwet

651

1,111

0.59

Post-COVID

Embu

1,501

3,408

0.44

Pre-COVID

Embu

1,443

3,437

0.42

Post-COVID

Garissa

955

1,738

0.55

Pre-COVID

Garissa

1,047

1,921

0.55

Post-COVID

Homa Bay

3,321

3,782

0.88

Pre-COVID

Homa Bay

2,912

3,713

0.78

Post-COVID

Isiolo

653

1,142

0.57

Pre-COVID

Isiolo

751

1,237

0.61

Post-COVID

Kajiado

1,534

3,125

0.49

Pre-COVID

Kajiado

1,788

3,144

0.57

Post-COVID

Kakamega

2,411

3,974

0.61

Pre-COVID

Kakamega

2,985

4,423

0.67

Post-COVID

Kericho

2,217

4,077

0.54

Pre-COVID

Kericho

2,098

3,939

0.53

Post-COVID

Kiambu

4,294

9,174

0.47

Pre-COVID

Kiambu

4,807

9,118

0.53

Post-COVID

Kilifi

1,792

3,125

0.57

Pre-COVID

Kilifi

2,477

3,708

0.67

Post-COVID

Kirinyaga

1,051

3,001

0.35

Pre-COVID

Kirinyaga

1,220

2,939

0.42

Post-COVID

Kisii

2,048

3,819

0.54

Pre-COVID

Kisii

2,244

3,475

0.65

Post-COVID

Kisumu

2,462

3,738

0.66

Pre-COVID

Kisumu

3,052

4,752

0.64

Post-COVID

Kitui

2,002

5,107

0.39

Pre-COVID

Kitui

2,297

4,993

0.46

Post-COVID

Kwale

1,017

1,775

0.57

Pre-COVID

Kwale

1,181

1,849

0.64

Post-COVID

Laikipia

920

1,775

0.52

Pre-COVID

Laikipia

1,173

2,098

0.56

Post-COVID

Lamu

339

635

0.53

Pre-COVID

Lamu

334

605

0.55

Post-COVID

Machakos

2,089

5,294

0.39

Pre-COVID

Machakos

2,641

5,787

0.46

Post-COVID

Makueni

1,508

3,665

0.41

Pre-COVID

Makueni

1,916

4,255

0.45

Post-COVID

Mandera

738

1,162

0.64

Pre-COVID

Mandera

779

1,155

0.67

Post-COVID

Marsabit

791

1,098

0.72

Pre-COVID

Marsabit

782

1,203

0.65

Post-COVID

Meru

3,136

8,725

0.36

Pre-COVID

Meru

3,802

9,229

0.41

Post-COVID

Migori

1,755

3,105

0.57

Pre-COVID

Migori

2,029

3,061

0.66

Post-COVID

Mombasa

4,127

8,359

0.49

Pre-COVID

Mombasa

4,111

8,207

0.50

Post-COVID

Murang'a

1,892

5,108

0.37

Pre-COVID

Murang'a

1,669

4,079

0.41

Post-COVID

Nairobi

12,486

22,480

0.56

Pre-COVID

Nairobi

14,507

25,203

0.58

Post-COVID

Nakuru

3,100

6,823

0.45

Pre-COVID

Nakuru

3,726

6,946

0.54

Post-COVID

Nandi

751

1,590

0.47

Pre-COVID

Nandi

808

1,573

0.51

Post-COVID

Narok

1,587

2,853

0.56

Pre-COVID

Narok

1,836

2,917

0.63

Post-COVID

Nyamira

1,018

1,584

0.64

Pre-COVID

Nyamira

949

1,302

0.73

Post-COVID

Nyandarua

593

1,347

0.44

Pre-COVID

Nyandarua

790

1,538

0.51

Post-COVID

Nyeri

1,312

3,121

0.42

Pre-COVID

Nyeri

1,296

2,991

0.43

Post-COVID

Samburu

910

1,176

0.77

Pre-COVID

Samburu

915

1,133

0.81

Post-COVID

Siaya

3,777

4,645

0.81

Pre-COVID

Siaya

2,712

3,338

0.81

Post-COVID

Taita Taveta

577

1,220

0.47

Pre-COVID

Taita Taveta

581

1,151

0.50

Post-COVID

Tana River

530

756

0.70

Pre-COVID

Tana River

523

788

0.66

Post-COVID

Tharaka-Nithi

926

2,109

0.44

Pre-COVID

Tharaka-Nithi

1,209

2,247

0.54

Post-COVID

Trans Nzoia

1,132

2,036

0.56

Pre-COVID

Trans Nzoia

1,500

2,523

0.59

Post-COVID

Turkana

3,159

5,023

0.63

Pre-COVID

Turkana

2,854

4,362

0.65

Post-COVID

Uasin Gishu

1,710

3,402

0.50

Pre-COVID

Uasin Gishu

1,746

3,202

0.55

Post-COVID

Vihiga

719

1,681

0.43

Pre-COVID

Vihiga

897

1,857

0.48

Post-COVID

Wajir

623

1,013

0.62

Pre-COVID

Wajir

691

1,148

0.60

Post-COVID

West Pokot

1,644

2,858

0.58

Pre-COVID

West Pokot

1,959

3,022

0.65

##Objective 2

tb_outcome_year_wide <- tb_outcome_year %>%
  select(Year, outcome, cases) %>%
  pivot_wider(
    names_from = outcome,
    values_from = cases,
    values_fill = 0
  )

tb_outcome_year_wide %>% flextable()

Year

Cured

Died

Failed

Lost to follow-up

Missing

Not evaluated

Transferred out

Treatment completed

2,017

31,228

4,956

273

4,238

1

375

3,745

34,152

2,018

36,195

6,430

430

5,501

0

502

1,878

47,231

2,019

36,351

5,618

515

5,090

0

380

1,283

40,923

2,020

32,613

5,379

450

4,165

0

609

896

32,406

2,021

34,345

5,044

488

4,154

0

541

468

36,402

2,022

38,330

5,495

626

4,539

0

1,096

552

45,060

ggplot(outcome_year, aes(x = factor(Year), y = cases, fill = outcome)) +
  geom_col(width = 0.8) +
  labs(
    title = "TB Treatment Outcomes by Year",
    x = "Year",
    y = "Number of Patients",
    fill = "Outcome"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

ggplot(success_year, aes(x = Year, y = success_rate)) +
  geom_line(size = 1.2, color = "#1E8449") +
  geom_point(size = 3, color = "#1E8449") +
  geom_hline(yintercept = 90, linetype = "dotted", color = "grey40") +
  labs(
    title = "TB Treatment Success Rate by Year",
    x = "Year",
    y = "Success Rate (%)"
  ) +
  theme_minimal(base_size = 13)

ggplot(covid_outcomes, aes(x = covid_period, y = percent, fill = outcome)) +
  geom_col(width = 0.6) +
  labs(
    title = "TB Treatment Outcomes: Pre- vs Post-COVID",
    x = "",
    y = "Percentage of Patients",
    fill = "Outcome"
  ) +
  theme_minimal(base_size = 13)

p_death <- ggplot(death_year, aes(x = Year, y = n)) +
  geom_line(size = 1.3, color = "#B03A2E") +
  geom_point(size = 3, color = "#B03A2E") +
  geom_vline(xintercept = 2020, linetype = "dashed", color = "grey40") +
  labs(
    title = "TB Mortality Trend by Year",
    x = "Year",
    y = "Number of Deaths"
  ) +
  theme_tb

p_death

age_gender_death %>% flextable()

age_group

Gender

deaths

total

mortality_rate

15-29

Female

6,973

104,849

6.7

15-29

Male

11,806

192,486

6.1

30-44

Female

1,755

26,341

6.7

30-44

Male

3,007

47,968

6.3

45-59

Female

568

8,655

6.6

45-59

Male

929

16,058

5.8

60+

Female

926

14,347

6.5

60+

Male

1,666

26,840

6.2

<15

Female

1,958

29,263

6.7

<15

Male

3,334

54,146

6.2

ggplot(age_gender_death, aes(x = age_group, y = mortality_rate, fill = Gender)) +
  geom_col(position = "dodge") +
  labs(
    title = "Mortality Rate by Age Group and Gender",
    x = "Age Group",
    y = "Mortality Rate (%)",
    fill = "Gender"
  ) +
  theme_minimal(base_size = 14)

ggplot(monthly_deaths, aes(x = month, y = deaths)) +
  geom_col(fill = "#C0392B") +
  geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dashed", color = "grey40") +
  annotate("text", x = as.Date("2020-03-01"), y = max(monthly_deaths$deaths),
           label = "COVID onset", hjust = -0.1, vjust = -0.5, size = 4) +
  labs(
    title = "Monthly TB Deaths (Bar Plot)",
    x = "Month",
    y = "Number of Deaths"
  ) +
  theme_minimal(base_size = 14) +
  scale_x_date(date_labels = "%b-%Y", date_breaks = "2 months") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(monthly_deaths, aes(x = month, y = deaths)) +
  geom_line(color = "#C0392B", size = 1.2) +
  geom_point(color = "#C0392B", size = 2) +
  geom_vline(xintercept = as.Date("2020-03-01"), linetype = "dashed", color = "grey40") +
  annotate("text", x = as.Date("2020-03-01"), y = max(monthly_deaths$deaths),
           label = "COVID onset", hjust = -0.1, size = 4) +
  labs(
    title = "Monthly TB Deaths (Interrupted Time Series)",
    x = "Month",
    y = "Number of Deaths"
  ) +
  theme_minimal(base_size = 14)

##Case fatality per 100,000 TB cases

cfr_age_gender <- dat8 %>%
  group_by(age_group, Gender) %>%
  summarise(
    deaths = sum(died, na.rm = TRUE),
    tb_cases = n(),
    case_fatality_100k = round((deaths / tb_cases) * 100000, 1),
    .groups = "drop"
  )

cfr_age_gender %>% flextable()

age_group

Gender

deaths

tb_cases

case_fatality_100k

15-29

Female

6,973

104,849

6,650.5

15-29

Male

11,806

192,486

6,133.4

30-44

Female

1,755

26,341

6,662.6

30-44

Male

3,007

47,968

6,268.8

45-59

Female

568

8,655

6,562.7

45-59

Male

929

16,058

5,785.3

60+

Female

926

14,347

6,454.3

60+

Male

1,666

26,840

6,207.2

<15

Female

1,958

29,263

6,691.0

<15

Male

3,334

54,146

6,157.4

ggplot(cfr_age_gender,
       aes(x = age_group, y = case_fatality_100k, fill = Gender)) +
  geom_col(position = "dodge") +
  labs(
    title = "TB Case Fatality per 100,000 Cases",
    x = "Age Group",
    y = "Deaths per 100,000 TB Cases",
    fill = "Gender"
  ) +
  theme_minimal(base_size = 14)

TB Treatment Follow-up Outcomes Before, During, and After COVID-19

ggplot(tb_summary_period,
       aes(x = period, y = percent, fill = followup_outcome)) +
  geom_col(width = 0.7) +
  labs(
    title = "TB Treatment Follow-up Outcomes Before, During, and After COVID-19",
    x = "Period",
    y = "Percentage of patients",
    fill = "Outcome"
  ) +
  theme_minimal(base_size = 14)

completion_rate <- tb_outcomes_period %>%
  mutate(completed = if_else(`Treatment Outcome` %in% c("C", "TC"), 1, 0)) %>%
  group_by(period) %>%
  summarise(
    completed = sum(completed),
    total = n(),
    completion_rate = completed / total * 100,
    .groups = "drop"
  )

completion_rate %>% flextable()

period

completed

total

completion_rate

COVID

124,777

145,208

85.92984

Post-COVID

83,589

95,941

87.12542

Pre-COVID

236,870

279,804

84.65569

or_table_clean %>%
  kable(
    col.names = c("Variable", "Adjusted OR", "95% CI Lower", "95% CI Upper", "P-value"),
    caption = "Adjusted Odds Ratios for TB Treatment Completion (Univariate Model)"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed"),
    full_width = FALSE,
    position = "center"
  )
Adjusted Odds Ratios for TB Treatment Completion (Univariate Model)
Variable Adjusted OR 95% CI Lower 95% CI Upper P-value
Baseline (Pre-COVID, Female, <15 years) 6.11 6.02 6.20 0
Post-COVID vs Pre-COVID 1.11 1.08 1.13 0
periodPre-COVID 0.90 0.89 0.92 0
or_multiv_clean %>%
  mutate(
    estimate = round(estimate, 2),
    conf.low = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value = signif(p.value, 3)
  ) %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  kable(
    col.names = c(
      "Variable",
      "Adjusted OR",
      "95% CI (Lower)",
      "95% CI (Upper)",
      "P-value"
    ),
    caption = "Adjusted Odds Ratios for TB Treatment Completion"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover"),
    full_width = FALSE
  )
Adjusted Odds Ratios for TB Treatment Completion
Variable Adjusted OR 95% CI (Lower) 95% CI (Upper) P-value
Baseline (Pre-COVID, Female, <15 years) 7.95 7.57 8.36 0.00000
COVID period vs Pre-COVID 1.12 1.10 1.14 0.00000
Post-COVID vs Pre-COVID 1.24 1.22 1.27 0.00000
Male vs Female 0.87 0.86 0.89 0.00000
Age 15–24 vs <15 1.08 1.02 1.14 0.00485
Age 25–44 vs <15 0.73 0.70 0.77 0.00000
Age 45–64 vs <15 0.68 0.64 0.71 0.00000
Age ≥65 vs <15 0.53 0.50 0.56 0.00000